In [ ]:
import matplotlib.pyplot as plt
import geopandas as gpd
In [ ]:
import warnings
# warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)
In [ ]:
!pip install r5py
In [ ]:
!pip install pyrosm
In [ ]:
from pyrosm import OSM, get_data
In [ ]:
warsaw_osm = OSM(get_data("warsaw"))
In [ ]:
from r5py import TransportNetwork
# Set the file path to the OSM data
warsaw_osm = '/content/drive/MyDrive/Projekt zaliczeniowy/Warsaw.osm.pbf'
# Initialize the TransportNetwork with OSM and GTFS data
transport_network = TransportNetwork(
warsaw_osm, # OSM data file path as a string
[
'/content/drive/MyDrive/Projekt zaliczeniowy/warsaw.zip' # GTFS file(s)
]
)
In [ ]:
!apt-get -qq install -y libspatialindex-dev
!pip install -q -U osmnx
!pip install folium matplotlib mapclassify
In [ ]:
import osmnx as ox
from shapely.geometry import Point
In [ ]:
address = "PJATK Warszawa"
lat, lon = ox.geocode(address)
origin = gpd.GeoDataFrame({"geometry": [Point(lon, lat)], "name": "PJATK Warszawa", "id": [0]}, index=[0], crs="epsg:4326")
In [ ]:
import geopandas as gpd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objs as go
import pyproj
import folium
BUGGING STUFF¶
In [ ]:
import geopandas as gpd
pop_grid = gpd.read_file('/content/drive/MyDrive/Projekt zaliczeniowy/250^2 IDWx4 grid Waw.gpkg')
pop_grid = pop_grid.rename(columns={'DN': 'zaludnienie',})
pop_grid.head(1)
Out[ ]:
| zaludnienie | geometry | |
|---|---|---|
| 0 | 508 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... |
Wykres gęstość zaludnienia (ekstrapolacja)¶
In [ ]:
f = folium.Figure(width=1200, height=600)
m = pop_grid.explore("zaludnienie", cmap="Greens", max_zoom=13, tiles="Cartodb Positron")
m.add_to(f)
f
Out[ ]:
In [ ]:
points = pop_grid.copy()
points.geometry.set_crs('EPSG:4326', inplace=True)
# points = points.to_crs(crs='EPSG:3857')
points["geometry"] = points.centroid
<ipython-input-14-e73cd62b9592>:10: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. points["geometry"] = points.centroid
In [ ]:
points.to_crs(pyproj.CRS.from_epsg(4326), inplace=True)
In [ ]:
points = points.reset_index()
points = points.rename(columns={"index": "id", })
points.head(1)
Out[ ]:
| id | zaludnienie | geometry | |
|---|---|---|---|
| 0 | 0 | 508 | POINT (20.95278 52.36861) |
In [ ]:
import datetime
from r5py import TravelTimeMatrixComputer, TransportMode
travel_time_matrix_computer = TravelTimeMatrixComputer(
transport_network,
origins=origin,
destinations=points,
departure=datetime.datetime(2024,6,8,18,30),
transport_modes=[TransportMode.TRANSIT, TransportMode.WALK]
)
/usr/local/lib/python3.10/dist-packages/r5py/r5/regional_task.py:223: RuntimeWarning: Departure time 2024-06-08 18:30:00 is outside of the time range covered by currently loaded GTFS data sets. warnings.warn(
In [ ]:
travel_time_matrix = travel_time_matrix_computer.compute_travel_times()
travel_time_matrix.head(1)
Out[ ]:
| from_id | to_id | travel_time | |
|---|---|---|---|
| 0 | 0 | 0 | 91.0 |
In [ ]:
pop_grid = pop_grid.reset_index()
pop_grid = pop_grid.rename(columns={"index": "id", })
pop_grid.head(1)
Out[ ]:
| id | zaludnienie | geometry | |
|---|---|---|---|
| 0 | 0 | 508 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... |
In [ ]:
travel_time_df = pop_grid.merge(travel_time_matrix, left_on="id", right_on="to_id")
travel_time_df = travel_time_df.rename(columns={"travel_time": "travel_time_TRANSIT", })
travel_time_df.head()
Out[ ]:
| id | zaludnienie | geometry | from_id | to_id | travel_time_TRANSIT | |
|---|---|---|---|---|---|---|
| 0 | 0 | 508 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... | 0 | 0 | 91.0 |
| 1 | 1 | 491 | POLYGON ((20.95831 52.36964, 20.95821 52.36740... | 0 | 1 | 89.0 |
| 2 | 2 | 300 | POLYGON ((20.96196 52.36957, 20.96186 52.36734... | 0 | 2 | 87.0 |
| 3 | 3 | 110 | POLYGON ((20.96561 52.36951, 20.96551 52.36728... | 0 | 3 | 79.0 |
| 4 | 4 | 157 | POLYGON ((21.05692 52.36796, 21.05681 52.36572... | 0 | 4 | 91.0 |
Wykres czas podróży do PJATK transportem publicznym¶
In [ ]:
fig, ax = plt.subplots(figsize=(9, 9))
travel_time_df.plot(column="travel_time_TRANSIT", cmap="RdYlBu", scheme="equal_interval", k=13, legend=True, ax=ax)
origin.to_crs(crs=travel_time_df.crs).plot(ax=ax, color="green", markersize=40)
ax.set_title(f"Travel time by public transport to PJATK")
plt.show()
To samo, tylko w folium
In [ ]:
f = folium.Figure(width=1200, height=600)
m = travel_time_df.explore("travel_time_TRANSIT", cmap="RdYlBu", max_zoom=13, tiles="Cartodb Positron")
m = origin.explore(m=m, color="red", marker_kwds={"radius": 10})
m.add_to(f)
f
Out[ ]:
In [ ]:
travel_time_matrix_computer_b = TravelTimeMatrixComputer(
transport_network,
origins=origin,
destinations=points,
departure=datetime.datetime(2024,6,8,18,30),
transport_modes=[TransportMode.BICYCLE,]
)
In [ ]:
travel_time_matrix = travel_time_matrix_computer_b.compute_travel_times()
travel_time_matrix.head(1)
Out[ ]:
| from_id | to_id | travel_time | |
|---|---|---|---|
| 0 | 0 | 0 | 118.0 |
In [ ]:
travel_time_df = travel_time_df.merge(travel_time_matrix, left_on="id", right_on="to_id")
travel_time_df = travel_time_df.rename(columns={"travel_time": "travel_time_BIKE", })
travel_time_df.head()
Out[ ]:
| id | zaludnienie | geometry | from_id_x | to_id_x | travel_time_TRANSIT | from_id_y | to_id_y | travel_time_BIKE | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 508 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... | 0 | 0 | 91.0 | 0 | 0 | 118.0 |
| 1 | 1 | 491 | POLYGON ((20.95831 52.36964, 20.95821 52.36740... | 0 | 1 | 89.0 | 0 | 1 | 120.0 |
| 2 | 2 | 300 | POLYGON ((20.96196 52.36957, 20.96186 52.36734... | 0 | 2 | 87.0 | 0 | 2 | 121.0 |
| 3 | 3 | 110 | POLYGON ((20.96561 52.36951, 20.96551 52.36728... | 0 | 3 | 79.0 | 0 | 3 | NaN |
| 4 | 4 | 157 | POLYGON ((21.05692 52.36796, 21.05681 52.36572... | 0 | 4 | 91.0 | 0 | 4 | NaN |
In [ ]:
travel_time_df['difference'] = travel_time_df['travel_time_TRANSIT'] - travel_time_df['travel_time_BIKE']
In [ ]:
travel_time_df['difference'].value_counts().head()
Out[ ]:
difference -2.0 248 -4.0 247 0.0 244 1.0 240 -3.0 239 Name: count, dtype: int64
Wykres różnica między czasem podróży rowerem a transportem publicznym¶
In [ ]:
import folium
f = folium.Figure(width=1200, height=600)
m = travel_time_df.explore("difference", cmap="RdBu_r", max_zoom=13, tiles="Cartodb Positron")
m = origin.explore(m=m, color="red", marker_kwds={"radius": 10})
m.add_to(f)
f
Out[ ]:
Usługi¶
In [ ]:
amenities = ["library"]
query = ["Warsaw, Poland",]
libraries = ox.features_from_place(query, tags={"amenity": amenities})
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
libraries.head()
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. and should_run_async(code)
Out[ ]:
| addr:city | addr:housenumber | addr:postcode | addr:street | amenity | contact:email | contact:phone | name | opening_hours | operator | ... | name:sq | name:sr | name:tr | name:udm | name:uk | name:vi | name:zh | name:zh-Hans | name:zh-Hant | type | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| element_type | osmid | |||||||||||||||||||||
| node | 278609792 | Warszawa | 71 | 02-679 | Zygmunta Modzelewskiego | library | nr84@bpmokotow.waw.pl | +48 22 844 62 71 | Wypożyczalnia dla Dorosłych i Młodzieży Nr 84 | Mo,Tu,Fr 14:00-19:00; We,Th 11:00-16:00 | Biblioteka Publiczna m.st. Warszawy | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 312081254 | Warszawa | 3 | 01-864 | Włodzimierza Perzyńskiego | library | NaN | NaN | Biblioteka Publiczna m.st. Warszawy Dzielnicy ... | Mo,Tu 10:00-16:00; We-Fr 12:00-19:00 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| 331106257 | Warszawa | 35 | 02-030 | Grójecka | library | NaN | NaN | Wypożyczalnia dla Dorosłych i Młodzieży Nr 23 | Mo,We,Fr 13:00-19:00; Tu,Th 09:00-15:00; PH off | Biblioteka Publiczna m.st. Warszawy | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| 376053555 | NaN | 9/15 | NaN | Marszałkowska | library | NaN | NaN | Wypożyczalnia dla Dorosłych i Młodzieży nr 54 | Mo-Fr 10:00-19:00 | Biblioteka Publiczna w Dzielnicy Śródmieście m... | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| 410399923 | Warszawa | 17 | 01-381 | Powstańców Śląskich | library | NaN | NaN | Czytelnia nr XVII | Mo-We, Fr 10:00-19:30; Th 10:00-17:00 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 115 columns
In [ ]:
libraries["geometry"] = libraries.centroid
<ipython-input-31-b84e84f7ddfe>:1: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. libraries["geometry"] = libraries.centroid
In [ ]:
ax = libraries.plot()
ax
Out[ ]:
<Axes: >
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
libraries = libraries.reset_index()
libraries["id"] = libraries.index
libraries.head()
Out[ ]:
| element_type | osmid | addr:city | addr:housenumber | addr:postcode | addr:street | amenity | contact:email | contact:phone | name | ... | name:sr | name:tr | name:udm | name:uk | name:vi | name:zh | name:zh-Hans | name:zh-Hant | type | id | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | node | 278609792 | Warszawa | 71 | 02-679 | Zygmunta Modzelewskiego | library | nr84@bpmokotow.waw.pl | +48 22 844 62 71 | Wypożyczalnia dla Dorosłych i Młodzieży Nr 84 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 1 | node | 312081254 | Warszawa | 3 | 01-864 | Włodzimierza Perzyńskiego | library | NaN | NaN | Biblioteka Publiczna m.st. Warszawy Dzielnicy ... | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1 |
| 2 | node | 331106257 | Warszawa | 35 | 02-030 | Grójecka | library | NaN | NaN | Wypożyczalnia dla Dorosłych i Młodzieży Nr 23 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2 |
| 3 | node | 376053555 | NaN | 9/15 | NaN | Marszałkowska | library | NaN | NaN | Wypożyczalnia dla Dorosłych i Młodzieży nr 54 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3 |
| 4 | node | 410399923 | Warszawa | 17 | 01-381 | Powstańców Śląskich | library | NaN | NaN | Czytelnia nr XVII | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 4 |
5 rows × 118 columns
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
travel_time_matrix_computer = TravelTimeMatrixComputer(
transport_network,
origins=libraries,
destinations=points,
departure=datetime.datetime(2024,6,8,18,30),
transport_modes=[TransportMode.TRANSIT, TransportMode.WALK],
)
travel_time_matrix = travel_time_matrix_computer.compute_travel_times()
/usr/local/lib/python3.10/dist-packages/r5py/r5/regional_task.py:223: RuntimeWarning: Departure time 2024-06-08 18:30:00 is outside of the time range covered by currently loaded GTFS data sets. warnings.warn(
In [ ]:
travel_time_matrix.shape
Out[ ]:
(1870917, 3)
In [ ]:
# Find out the travel time to closest library
closest_library = travel_time_matrix.groupby("to_id")["travel_time"].min().reset_index()
closest_library = closest_library.rename(columns={'travel_time':'tt_to_closest_library'})
print(closest_library.shape)
closest_library.head()
(2261, 2)
Out[ ]:
| to_id | tt_to_closest_library | |
|---|---|---|
| 0 | 0 | 26.0 |
| 1 | 1 | 26.0 |
| 2 | 2 | 10.0 |
| 3 | 3 | 14.0 |
| 4 | 4 | 14.0 |
In [ ]:
travel_time_df = travel_time_df.merge(closest_library, left_on="id", right_on='to_id')
In [ ]:
travel_time_df.head(3)
Out[ ]:
| id | zaludnienie | geometry | from_id_x | to_id_x | travel_time_TRANSIT | from_id_y | to_id_y | travel_time_BIKE | difference | to_id | tt_to_closest_library | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 508 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... | 0 | 0 | 91.0 | 0 | 0 | 118.0 | -27.0 | 0 | 51.0 |
| 1 | 1 | 491 | POLYGON ((20.95831 52.36964, 20.95821 52.36740... | 0 | 1 | 89.0 | 0 | 1 | 120.0 | -31.0 | 1 | 56.0 |
| 2 | 2 | 300 | POLYGON ((20.96196 52.36957, 20.96186 52.36734... | 0 | 2 | 87.0 | 0 | 2 | 121.0 | -34.0 | 2 | 54.0 |
BIBLIOTEKI - Wykres czas podróży do najbliższej biblioteki¶
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
fig, ax = plt.subplots(figsize=(9, 9))
travel_time_df.plot(column="tt_to_closest_library", cmap="RdYlBu", scheme="natural_breaks", k=12, legend=True, ax=ax)
origin.to_crs(crs=travel_time_df.crs).plot(ax=ax, color="green", markersize=40)
ax.set_title(f"Travel time to closest libary in minutes.");
plt.show()
Wykresy po dzielnicy - spatial join¶
In [ ]:
distr = gpd.read_file('/content/drive/MyDrive/Projekt zaliczeniowy/Mieszkania Warszawy/dzielnice_Warszawy_pack_WGS84.gpkg')
# distr = distr.rename(columns={'DN': 'zaludnienie',})
distr.head(1)
Out[ ]:
| nazwa_dzie | geometry | |
|---|---|---|
| 0 | Żoliborz | MULTIPOLYGON (((20.95755 52.26693, 20.95760 52... |
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
fig, ax = plt.subplots(figsize=(9, 9))
travel_time_df.plot(column="tt_to_closest_library", cmap="RdYlBu", scheme="natural_breaks", k=12, legend=True, ax=ax)
origin.to_crs(crs=travel_time_df.crs).plot(ax=ax, color="green", markersize=40)
ax.set_title(f"Travel time to closest libary in minutes.");
distr.boundary.plot(color='#333333', linewidth=1, ax=ax)
plt.show()
Lepiej coś takiego zrobić w QGIS-ie
Test z ilością restauracji w zasięgu 15 minut¶
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
amenities = ['restaurant', 'pub', 'bar']
query = ["Warsaw, Poland",]
restaurants = ox.features_from_place(query, tags={"amenity": amenities[0]})
pubs = ox.features_from_place(query, tags={"amenity": amenities[1]})
bars = ox.features_from_place(query, tags={"amenity": amenities[2]})
In [ ]:
print(restaurants.shape)
(1935, 198)
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. and should_run_async(code)
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
print(restaurants.shape)
display(restaurants.head(3))
print(pubs.shape)
display(pubs.head(3))
print(bars.shape)
display(bars.head(3))
(1935, 198)
| addr:city | addr:housenumber | addr:street | amenity | geometry | addr:postcode | check_date:opening_hours | cuisine | name | opening_hours | ... | source:building | old_addr:housenumber | wikidata | drive_in | nohousenumber | addr:source | building:part | roof:levels | roof:orientation | building:material | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| element_type | osmid | |||||||||||||||||||||
| node | 31005854 | Warszawa | 110 | Aleja Krakowska | restaurant | POINT (20.94595 52.17691) | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 201830837 | Warszawa | 37 | Barska | restaurant | POINT (20.97434 52.21898) | 02-315 | 2023-06-30 | pizza | Pizzeria na Barskiej | Mo-Su 12:00-22:00 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| 247441607 | Warszawa | 46 | Aleja Jerzego Waszyngtona | restaurant | POINT (21.06198 52.23995) | 03-910 | 2022-08-22 | pizza | Chicago's Pizza | Mo-Th 11:00-23:00; Fr-Sa 11:00-24:00; Su 11:00... | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 rows × 198 columns
(152, 77)
| addr:city | addr:country | addr:housenumber | addr:postcode | addr:street | amenity | check_date | name | opening_hours | ... | microbrewery | shop | source:addr | survey:date | source | mobile | nodes | building | building:levels | height | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| element_type | osmid | |||||||||||||||||||||
| node | 248569875 | Warszawa | PL | 177 | 02-555 | Aleja Niepodległości | pub | 2024-03-23 | pub@zielonages.pl | Zielona Gęś | 12:00+ | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 261659066 | NaN | NaN | 54 | NaN | Aleja Komisji Edukacji Narodowej | pub | NaN | NaN | Klubokawiarnia KEN 54 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| 280333661 | NaN | NaN | NaN | NaN | NaN | pub | NaN | NaN | Bez Słowa | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 rows × 77 columns
(294, 105)
| addr:city | addr:housenumber | addr:street | amenity | check_date | description | name | opening_hours | outdoor_seating | ... | heritage | heritage:operator | historic | motorcycle | motorcycle:theme | old_name | ref:gez | theme | indoor | ref | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| element_type | osmid | |||||||||||||||||||||
| node | 248197975 | Warszawa | 4 | Henryka Sienkiewicza | bar | 2023-09-05 | Multitap bar | thetapswarsaw | The Taps | Mo-We, Su 15:00-00:00; Th 15:00-01:00; Fr, Sa ... | yes | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 302711831 | NaN | NaN | NaN | bar | 2024-04-05 | NaN | NaN | Inspector Lounge | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| 306550336 | NaN | 5 | Chmielna | bar | 2022-07-22 | NaN | NaN | Zakątek | NaN | yes | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 rows × 105 columns
Sprawdzenie czy są powtórzenia:
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
rest_reset = restaurants.reset_index()
pubs_reset = pubs.reset_index()
bars_reset = bars.reset_index()
In [ ]:
overlap_rest_pubs = rest_reset['osmid'][rest_reset['osmid'].isin(pubs_reset['osmid'])]
In [ ]:
overlap_rest_bars = rest_reset['osmid'][rest_reset['osmid'].isin(bars_reset['osmid'])]
In [ ]:
overlap_bars_pubs = pubs_reset['osmid'][pubs_reset['osmid'].isin(bars_reset['osmid'])]
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
display(overlap_rest_pubs)
display(overlap_rest_bars)
display(overlap_bars_pubs)
Series([], Name: osmid, dtype: int64)
Series([], Name: osmid, dtype: int64)
Series([], Name: osmid, dtype: int64)
Wygląda na to, że nie ma powtórzeń.
In [ ]:
import numpy as np
length = bars.shape[0]
print(f'Total records: {length}')
try:
for i in np.array(bars.columns.tolist()).reshape(15,7):
hd = bars[i].head(3)
nans = [bars[j].isna().sum() for j in hd.columns]
# nans_perc = [(str(int(round(df[j].isna().sum() / length * 100, 4))) + '%') for j in hd.columns]
nans_perc = [ round(bars[j].isna().sum() / length, 3) * 100 for j in hd.columns ]
# nans_perc = [round(df[j].isna().sum() / length, 3) * 100 for j in hd.columns]
uniques = [bars[j].nunique() for j in hd.columns]
typ = [ bars[bars[j].notnull()][j].dtype for j in hd.columns]
# print(typ)
hd.loc[len(hd)] = typ
hd.loc[len(hd)] = nans
hd.loc[len(hd)] = nans_perc
hd.loc[len(hd)] = uniques
hd.loc[len(hd)-3] = hd.loc[len(hd)-3].astype('int')
hd.loc[len(hd)-1] = hd.loc[len(hd)-1].astype('int')
hd = hd.rename(index={int(len(hd)-4): 'Data type',
int(len(hd)-3): 'Total NaN',
int(len(hd)-2): '% NaN',
int(len(hd)-1): 'Unique values',
})
display(hd)
print(' ')
except:
print('Coś nie pykło')
Total records: 294
| addr:city | addr:housenumber | addr:street | amenity | check_date | description | ||
|---|---|---|---|---|---|---|---|
| (node, 248197975) | Warszawa | 4 | Henryka Sienkiewicza | bar | 2023-09-05 | Multitap bar | thetapswarsaw |
| (node, 302711831) | NaN | NaN | NaN | bar | 2024-04-05 | NaN | NaN |
| (node, 306550336) | NaN | 5 | Chmielna | bar | 2022-07-22 | NaN | NaN |
| Data type | object | object | object | object | object | object | object |
| Total NaN | 212 | 145 | 139 | 0 | 137 | 289 | 279 |
| % NaN | 72.1 | 49.3 | 47.3 | 0.0 | 46.6 | 98.3 | 94.9 |
| Unique values | 2 | 76 | 70 | 1 | 79 | 3 | 15 |
| name | opening_hours | outdoor_seating | phone | toilets:wheelchair | wheelchair | geometry | |
|---|---|---|---|---|---|---|---|
| (node, 248197975) | The Taps | Mo-We, Su 15:00-00:00; Th 15:00-01:00; Fr, Sa ... | yes | +48601707407 | yes | limited | POINT (21.0125356 52.2344797) |
| (node, 302711831) | Inspector Lounge | NaN | NaN | NaN | NaN | NaN | POINT (21.0064856 52.2361758) |
| (node, 306550336) | Zakątek | NaN | yes | NaN | NaN | no | POINT (21.0182611 52.2328789) |
| Data type | object | object | object | object | object | object | geometry |
| Total NaN | 12 | 207 | 200 | 258 | 291 | 150 | 0 |
| % NaN | 4.1 | 70.4 | 68.0 | 87.8 | 99.0 | 51.0 | 0.0 |
| Unique values | 273 | 83 | 3 | 36 | 2 | 3 | 294 |
| level | addr:postcode | website | addr:city:en | note | access | ||
|---|---|---|---|---|---|---|---|
| (node, 248197975) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 302711831) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 306550336) | -1 | NaN | NaN | NaN | NaN | NaN | NaN |
| Data type | object | object | object | object | object | object | object |
| Total NaN | 260 | 218 | 280 | 236 | 292 | 293 | 292 |
| % NaN | 88.4 | 74.1 | 95.2 | 80.3 | 99.3 | 99.7 | 99.3 |
| Unique values | 5 | 40 | 14 | 56 | 1 | 1 | 2 |
| addr:floor | aeroway | leisure | indoor_seating | cuisine | alt_name | brewery | |
|---|---|---|---|---|---|---|---|
| (node, 248197975) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 302711831) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 306550336) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Data type | object | object | object | object | object | object | object |
| Total NaN | 289 | 292 | 290 | 283 | 286 | 288 | 284 |
| % NaN | 98.3 | 99.3 | 98.6 | 96.3 | 97.3 | 98.0 | 96.6 |
| Unique values | 3 | 1 | 3 | 1 | 5 | 6 | 6 |
| payment:cash | payment:credit_cards | payment:debit_cards | contact:facebook | internet_access | smoking | addr:city:simc | |
|---|---|---|---|---|---|---|---|
| (node, 248197975) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 302711831) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 306550336) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Data type | object | object | object | object | object | object | object |
| Total NaN | 292 | 289 | 291 | 286 | 288 | 283 | 287 |
| % NaN | 99.3 | 98.3 | 99.0 | 97.3 | 98.0 | 96.3 | 97.6 |
| Unique values | 1 | 1 | 1 | 8 | 2 | 3 | 2 |
| source:addr | opening_hours:signed | addr:housename | internet_access:fee | brand | brand:website | brand:wikidata | |
|---|---|---|---|---|---|---|---|
| (node, 248197975) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 302711831) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 306550336) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Data type | object | object | object | object | object | object | object |
| Total NaN | 288 | 287 | 291 | 293 | 286 | 291 | 287 |
| % NaN | 98.0 | 97.6 | 99.0 | 99.7 | 97.3 | 99.0 | 97.6 |
| Unique values | 2 | 1 | 2 | 1 | 3 | 1 | 2 |
| brand:wikipedia | entrance | operator | name:pl | check_date:opening_hours | name:en | sport | |
|---|---|---|---|---|---|---|---|
| (node, 248197975) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 302711831) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 306550336) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Data type | object | object | object | object | object | object | object |
| Total NaN | 291 | 293 | 292 | 289 | 290 | 286 | 292 |
| % NaN | 99.0 | 99.7 | 99.3 | 98.3 | 98.6 | 97.3 | 99.3 |
| Unique values | 1 | 1 | 2 | 5 | 3 | 6 | 2 |
| website_1 | official_name | source | hookah | short_name | diet:vegetarian | payment:american_express | |
|---|---|---|---|---|---|---|---|
| (node, 248197975) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 302711831) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 306550336) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Data type | object | object | object | object | object | object | object |
| Total NaN | 293 | 292 | 221 | 293 | 293 | 293 | 293 |
| % NaN | 99.7 | 99.3 | 75.2 | 99.7 | 99.7 | 99.7 | 99.7 |
| Unique values | 1 | 2 | 4 | 1 | 1 | 1 | 1 |
| payment:apple_pay | payment:blik | payment:diners_club | payment:discover_card | payment:google_pay | payment:jcb | payment:maestro | |
|---|---|---|---|---|---|---|---|
| (node, 248197975) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 302711831) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 306550336) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Data type | object | object | object | object | object | object | object |
| Total NaN | 293 | 293 | 293 | 293 | 293 | 293 | 293 |
| % NaN | 99.7 | 99.7 | 99.7 | 99.7 | 99.7 | 99.7 | 99.7 |
| Unique values | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| payment:mastercard | payment:mastercard_contactless | payment:unionpay | payment:visa | payment:visa_contactless | payment:visa_electron | reservation | |
|---|---|---|---|---|---|---|---|
| (node, 248197975) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 302711831) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 306550336) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Data type | object | object | object | object | object | object | object |
| Total NaN | 292 | 293 | 293 | 292 | 293 | 293 | 292 |
| % NaN | 99.3 | 99.7 | 99.7 | 99.3 | 99.7 | 99.7 | 99.3 |
| Unique values | 1 | 1 | 1 | 1 | 1 | 1 | 2 |
| name:ru | name:uk | start_date | access:covid19 | public_transport | shop | lgbtq | |
|---|---|---|---|---|---|---|---|
| (node, 248197975) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 302711831) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 306550336) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Data type | object | object | object | object | object | object | object |
| Total NaN | 293 | 292 | 292 | 293 | 293 | 288 | 293 |
| % NaN | 99.7 | 99.3 | 99.3 | 99.7 | 99.7 | 98.0 | 99.7 |
| Unique values | 1 | 1 | 2 | 1 | 1 | 1 | 1 |
| noname | contact:email | contact:phone | min_age | wine | contact:instagram | air_conditioning | |
|---|---|---|---|---|---|---|---|
| (node, 248197975) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 302711831) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| (node, 306550336) | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Data type | object | object | object | object | object | object | object |
| Total NaN | 291 | 293 | 293 | 292 | 293 | 293 | 293 |
| % NaN | 99.0 | 99.7 | 99.7 | 99.3 | 99.7 | 99.7 | 99.7 |
| Unique values | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
Coś nie pykło
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
restaurants["geometry"] = restaurants.centroid
bars["geometry"] = bars.centroid
pubs["geometry"] = pubs.centroid
<ipython-input-49-283dd92223be>:4: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. restaurants["geometry"] = restaurants.centroid <ipython-input-49-283dd92223be>:5: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. bars["geometry"] = bars.centroid <ipython-input-49-283dd92223be>:6: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. pubs["geometry"] = pubs.centroid
In [ ]:
bars.shape[0]
Out[ ]:
294
In [ ]:
msize = 8
fig, ax = plt.subplots(figsize=(9, 9))
restaurants.plot(color='#636EFA', label=f'Restauracje ({restaurants.shape[0]})', markersize=msize, ax=ax)
bars.plot(color='#EF553B', label=f'Bary ({bars.shape[0]})', markersize=msize, ax=ax)
pubs.plot(color='#00CC96', label=f'Puby ({pubs.shape[0]})', markersize=msize, ax=ax)
ax.legend()
plt.show()
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
restaurants = restaurants.reset_index()
restaurants["id"] = restaurants.index
bars = bars.reset_index()
bars["id"] = bars.index
pubs = pubs.reset_index()
pubs["id"] = pubs.index
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
restaurants.head()
Out[ ]:
| element_type | osmid | addr:city | addr:housenumber | addr:street | amenity | geometry | addr:postcode | check_date:opening_hours | cuisine | ... | old_addr:housenumber | wikidata | drive_in | nohousenumber | addr:source | building:part | roof:levels | roof:orientation | building:material | id | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | node | 31005854 | Warszawa | 110 | Aleja Krakowska | restaurant | POINT (20.94595 52.17691) | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 1 | node | 201830837 | Warszawa | 37 | Barska | restaurant | POINT (20.97434 52.21898) | 02-315 | 2023-06-30 | pizza | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1 |
| 2 | node | 247441607 | Warszawa | 46 | Aleja Jerzego Waszyngtona | restaurant | POINT (21.06198 52.23995) | 03-910 | 2022-08-22 | pizza | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2 |
| 3 | node | 247461210 | Warszawa | 26 | Widok | restaurant | POINT (21.01211 52.23124) | 00-023 | NaN | pizza | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3 |
| 4 | node | 248200343 | NaN | 3/5 | Foksal | restaurant | POINT (21.02340 52.23409) | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 4 |
5 rows × 201 columns
In [ ]:
def polygon_to_point(geometry):
if geometry.type == 'Polygon':
return geometry.centroid
else:
return geometry
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
restaurants['geometry'] = restaurants['geometry'].apply(polygon_to_point)
/usr/local/lib/python3.10/dist-packages/geopandas/geoseries.py:645: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``. result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs) <ipython-input-54-7c5a00dccd47>:2: ShapelyDeprecationWarning: The 'type' attribute is deprecated, and will be removed in the future. You can use the 'geom_type' attribute instead. if geometry.type == 'Polygon':
Travel time matrices - restaurants, pubs, bars¶
In [ ]:
create_matrices = True
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
if create_matrices:
travel_time_matrix_computer = TravelTimeMatrixComputer(
transport_network,
origins=restaurants,
destinations=points,
departure=datetime.datetime(2024,6,8,18,30),
transport_modes=[TransportMode.TRANSIT, TransportMode.WALK],
)
travel_time_matrix = travel_time_matrix_computer.compute_travel_times()
print(f'Full matrix shape: {travel_time_matrix.shape}')
display(travel_time_matrix.head(3))
cut_tt_matrix = travel_time_matrix.query('travel_time <= 15')
print(f'Cut to 15 minutes matrix shape: {cut_tt_matrix.shape}')
else:
cut_tt_matrix = gpd.read_file('/content/drive/MyDrive/Projekt zaliczeniowy/Matrices/restaurant_travel_time_matrix_CUT15.csv')
/usr/local/lib/python3.10/dist-packages/r5py/r5/regional_task.py:223: RuntimeWarning: Departure time 2024-06-08 18:30:00 is outside of the time range covered by currently loaded GTFS data sets. warnings.warn(
Full matrix shape: (16530705, 3)
| from_id | to_id | travel_time | |
|---|---|---|---|
| 0 | 0 | 0 | 94.0 |
| 1 | 0 | 1 | 95.0 |
| 2 | 0 | 2 | 93.0 |
Cut to 15 minutes matrix shape: (64065, 3)
In [ ]:
# cut_tt_matrix.to_csv('restaurant_travel_time_matrix_CUT15 v2.csv')
travel_time_matrix.to_csv('restaurant_travel_time_matrix v2.csv')
<frozen importlib._bootstrap>:914: ImportWarning: _PyDrive2ImportHook.find_spec() not found; falling back to find_module() <frozen importlib._bootstrap>:914: ImportWarning: _PyDriveImportHook.find_spec() not found; falling back to find_module() <frozen importlib._bootstrap>:914: ImportWarning: _GenerativeAIImportHook.find_spec() not found; falling back to find_module() <frozen importlib._bootstrap>:914: ImportWarning: _OpenCVImportHook.find_spec() not found; falling back to find_module() <frozen importlib._bootstrap>:914: ImportWarning: APICoreClientInfoImportHook.find_spec() not found; falling back to find_module() <frozen importlib._bootstrap>:914: ImportWarning: _BokehImportHook.find_spec() not found; falling back to find_module() <frozen importlib._bootstrap>:914: ImportWarning: _AltairImportHook.find_spec() not found; falling back to find_module()
In [ ]:
n_rest = cut_tt_matrix.groupby('to_id').size()
n_rest
Out[ ]:
to_id
16 1
18 1
19 1
20 1
40 1
..
8425 1
8426 2
8427 1
8428 1
8440 3
Length: 5050, dtype: int64
In [ ]:
travel_time_df_copy = travel_time_df.copy()
In [ ]:
travel_time_df_copy['n_restauracji'] = travel_time_df.index.map(n_rest).fillna(0).astype(int)
In [ ]:
travel_time_df_copy.head(3)
Out[ ]:
| id | zaludnienie | geometry | from_id_x | to_id_x | travel_time_TRANSIT | from_id_y | to_id_y | travel_time_BIKE | difference | to_id | tt_to_closest_library | n_restauracji | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 508 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... | 0 | 0 | 91.0 | 0 | 0 | 118.0 | -27.0 | 0 | 51.0 | 0 |
| 1 | 1 | 491 | POLYGON ((20.95831 52.36964, 20.95821 52.36740... | 0 | 1 | 89.0 | 0 | 1 | 120.0 | -31.0 | 1 | 56.0 | 0 |
| 2 | 2 | 300 | POLYGON ((20.96196 52.36957, 20.96186 52.36734... | 0 | 2 | 87.0 | 0 | 2 | 121.0 | -34.0 | 2 | 54.0 | 0 |
In [ ]:
travel_time_df_copy['n_restauracji'].value_counts()
Out[ ]:
n_restauracji
0 3493
1 902
2 601
3 484
4 310
...
111 1
209 1
228 1
244 1
49 1
Name: count, Length: 158, dtype: int64
In [ ]:
fig, ax = plt.subplots(figsize=(9, 9))
travel_time_df_copy.plot(column="n_restauracji", scheme="natural_breaks", k=12, legend=True, ax=ax)
ax.set_title(f"Ilość w restauracji w zasięgu 15 minut");
plt.show()
Kontynuacja dla barów i pubów¶
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
bars['geometry'] = bars['geometry'].apply(polygon_to_point)
pubs['geometry'] = pubs['geometry'].apply(polygon_to_point)
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
if create_matrices:
travel_time_matrix_computer = TravelTimeMatrixComputer(
transport_network,
origins=bars,
destinations=points,
departure=datetime.datetime(2024,6,8,18,30),
transport_modes=[TransportMode.TRANSIT, TransportMode.WALK],
)
travel_time_matrix = travel_time_matrix_computer.compute_travel_times()
/usr/local/lib/python3.10/dist-packages/r5py/r5/regional_task.py:223: RuntimeWarning: Departure time 2024-06-08 18:30:00 is outside of the time range covered by currently loaded GTFS data sets. warnings.warn(
In [ ]:
travel_time_matrix.shape
Out[ ]:
(2511642, 3)
In [ ]:
travel_time_matrix.to_csv('bars_travel_time_matrix.csv')
In [ ]:
cut_tt_matrix = travel_time_matrix.query('travel_time <= 15')
In [ ]:
cut_tt_matrix.to_csv('bars_travel_time_matrix_CUT15.csv')
In [ ]:
n_bars = cut_tt_matrix.groupby('to_id').size()
In [ ]:
n_bars
Out[ ]:
to_id
742 1
745 1
749 1
792 1
793 1
..
8103 2
8104 2
8105 2
8106 2
8151 1
Length: 1940, dtype: int64
In [ ]:
travel_time_df_copy['n_barow'] = travel_time_df.index.map(n_bars).fillna(0).astype(int)
In [ ]:
travel_time_df_copy.head(1)
Out[ ]:
| id | zaludnienie | geometry | from_id_x | to_id_x | travel_time_TRANSIT | from_id_y | to_id_y | travel_time_BIKE | difference | to_id | tt_to_closest_library | n_restauracji | n_barow | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 508 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... | 0 | 0 | 91.0 | 0 | 0 | 118.0 | -27.0 | 0 | 51.0 | 0 | 0 |
In [ ]:
fig, ax = plt.subplots(figsize=(9, 9))
travel_time_df_copy.plot(column="n_barow", scheme="natural_breaks", k=12, legend=True, ax=ax)
ax.set_title(f"Ilość w barów w zasięgu 15 minut");
plt.show()
In [ ]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
if create_matrices:
travel_time_matrix_computer = TravelTimeMatrixComputer(
transport_network,
origins=pubs,
destinations=points,
departure=datetime.datetime(2024,6,8,18,30),
transport_modes=[TransportMode.TRANSIT, TransportMode.WALK],
)
travel_time_matrix = travel_time_matrix_computer.compute_travel_times()
/usr/local/lib/python3.10/dist-packages/r5py/r5/regional_task.py:223: RuntimeWarning: Departure time 2024-06-08 18:30:00 is outside of the time range covered by currently loaded GTFS data sets. warnings.warn(
In [ ]:
travel_time_matrix.shape
Out[ ]:
(1298536, 3)
In [ ]:
travel_time_matrix.to_csv('pubs_travel_time_matrix.csv')
In [ ]:
cut_tt_matrix = travel_time_matrix.query('travel_time <= 15')
In [ ]:
cut_tt_matrix.to_csv('pubs_travel_time_matrix_CUT15.csv')
In [ ]:
n_pubs = cut_tt_matrix.groupby('to_id').size()
In [ ]:
travel_time_df_copy['n_pubow'] = travel_time_df.index.map(n_pubs).fillna(0).astype(int)
In [ ]:
travel_time_df_copy.head(1)
Out[ ]:
| id | zaludnienie | geometry | from_id_x | to_id_x | travel_time_TRANSIT | from_id_y | to_id_y | travel_time_BIKE | difference | to_id | tt_to_closest_library | n_restauracji | n_barow | n_pubow | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 508 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... | 0 | 0 | 91.0 | 0 | 0 | 118.0 | -27.0 | 0 | 51.0 | 0 | 0 | 0 |
In [ ]:
fig, ax = plt.subplots(figsize=(9, 9))
travel_time_df_copy.plot(column="n_pubow", scheme="natural_breaks", k=12, legend=True, ax=ax)
ax.set_title(f"Ilość w pubów w zasięgu 15 minut");
plt.show()
In [ ]:
travel_time_df_copy.to_csv('travel_time_df_copy.csv')
In [ ]:
travel_time_df_copy['n_gastro'] = travel_time_df_copy['n_restauracji'] + travel_time_df_copy['n_barow'] + travel_time_df_copy['n_pubow']
In [ ]:
fig, ax = plt.subplots(figsize=(9, 9))
travel_time_df_copy.plot(column="n_gastro", scheme="natural_breaks", k=12, legend=True, ax=ax)
ax.set_title(f"Suma restauracji, pubów i barów w zasięgu 15 minut");
plt.show()
Dominujące gastro¶
In [ ]:
travel_time_df_copy.head()
Out[ ]:
| id | zaludnienie | geometry | from_id_x | to_id_x | travel_time_TRANSIT | from_id_y | to_id_y | travel_time_BIKE | difference | to_id | tt_to_closest_library | n_restauracji | n_barow | n_pubow | n_gastro | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 508 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... | 0 | 0 | 91.0 | 0 | 0 | 118.0 | -27.0 | 0 | 51.0 | 0 | 0 | 0 | 0 |
| 1 | 1 | 491 | POLYGON ((20.95831 52.36964, 20.95821 52.36740... | 0 | 1 | 89.0 | 0 | 1 | 120.0 | -31.0 | 1 | 56.0 | 0 | 0 | 0 | 0 |
| 2 | 2 | 300 | POLYGON ((20.96196 52.36957, 20.96186 52.36734... | 0 | 2 | 87.0 | 0 | 2 | 121.0 | -34.0 | 2 | 54.0 | 0 | 0 | 0 | 0 |
| 3 | 3 | 110 | POLYGON ((20.96561 52.36951, 20.96551 52.36728... | 0 | 3 | 79.0 | 0 | 3 | NaN | NaN | 3 | 46.0 | 0 | 0 | 0 | 0 |
| 4 | 4 | 157 | POLYGON ((21.05692 52.36796, 21.05681 52.36572... | 0 | 4 | 91.0 | 0 | 4 | NaN | NaN | 4 | 37.0 | 0 | 0 | 0 | 0 |
In [ ]:
gastro_df = travel_time_df_copy[['id', 'geometry', 'n_restauracji', 'n_barow', 'n_pubow', 'n_gastro']].copy()
gastro_df
Out[ ]:
| id | geometry | n_restauracji | n_barow | n_pubow | n_gastro | |
|---|---|---|---|---|---|---|
| 0 | 0 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... | 0 | 0 | 0 | 0 |
| 1 | 1 | POLYGON ((20.95831 52.36964, 20.95821 52.36740... | 0 | 0 | 0 | 0 |
| 2 | 2 | POLYGON ((20.96196 52.36957, 20.96186 52.36734... | 0 | 0 | 0 | 0 |
| 3 | 3 | POLYGON ((20.96561 52.36951, 20.96551 52.36728... | 0 | 0 | 0 | 0 |
| 4 | 4 | POLYGON ((21.05692 52.36796, 21.05681 52.36572... | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... |
| 8538 | 8538 | POLYGON ((21.07731 52.10113, 21.07721 52.09889... | 0 | 0 | 0 | 0 |
| 8539 | 8539 | POLYGON ((21.08094 52.10107, 21.08084 52.09883... | 0 | 0 | 0 | 0 |
| 8540 | 8540 | POLYGON ((21.08457 52.10100, 21.08447 52.09877... | 0 | 0 | 0 | 0 |
| 8541 | 8541 | POLYGON ((21.07721 52.09889, 21.07711 52.09666... | 0 | 0 | 0 | 0 |
| 8542 | 8542 | POLYGON ((21.08084 52.09883, 21.08074 52.09659... | 0 | 0 | 0 | 0 |
8543 rows × 6 columns
In [ ]:
gastro_df['rest_%'] = gastro_df['n_restauracji'] / gastro_df['n_gastro']
gastro_df['bars_%'] = gastro_df['n_barow'] / gastro_df['n_gastro']
gastro_df['pubs_%'] = gastro_df['n_pubow'] / gastro_df['n_gastro']
In [ ]:
gastro_df = gastro_df.fillna(0)
In [ ]:
gastro_df = gastro_df.rename(columns={
'n_restauracji':'n_rest',
'n_barow':'n_bars',
'n_pubow':'n_pubs',
})
In [ ]:
gastro_df.head()
Out[ ]:
| id | geometry | n_rest | n_bars | n_pubs | n_gastro | rest_% | bars_% | pubs_% | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 |
| 1 | 1 | POLYGON ((20.95831 52.36964, 20.95821 52.36740... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 |
| 2 | 2 | POLYGON ((20.96196 52.36957, 20.96186 52.36734... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 |
| 3 | 3 | POLYGON ((20.96561 52.36951, 20.96551 52.36728... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 |
| 4 | 4 | POLYGON ((21.05692 52.36796, 21.05681 52.36572... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 |
In [ ]:
gastro_df['rest_norm'] = (gastro_df['n_rest'] - gastro_df['n_rest'].min()) / (gastro_df['n_rest'].max() - gastro_df['n_rest'].min())
gastro_df['bars_norm'] = (gastro_df['n_bars'] - gastro_df['n_bars'].min()) / (gastro_df['n_bars'].max() - gastro_df['n_bars'].min())
gastro_df['pubs_norm'] = (gastro_df['n_pubs'] - gastro_df['n_pubs'].min()) / (gastro_df['n_pubs'].max() - gastro_df['n_pubs'].min())
In [ ]:
gastro_df.head()
Out[ ]:
| id | geometry | n_rest | n_bars | n_pubs | n_gastro | rest_% | bars_% | pubs_% | rest_norm | bars_norm | pubs_norm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | 1 | POLYGON ((20.95831 52.36964, 20.95821 52.36740... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 2 | POLYGON ((20.96196 52.36957, 20.96186 52.36734... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 3 | POLYGON ((20.96561 52.36951, 20.96551 52.36728... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 4 | 4 | POLYGON ((21.05692 52.36796, 21.05681 52.36572... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
In [ ]:
norms = gastro_df.iloc[:, -3:]
gastro_df = gastro_df.iloc[:, :9]
In [ ]:
norms.head()
Out[ ]:
| rest_norm | bars_norm | pubs_norm | |
|---|---|---|---|
| 0 | 0.0 | 0.0 | 0.0 |
| 1 | 0.0 | 0.0 | 0.0 |
| 2 | 0.0 | 0.0 | 0.0 |
| 3 | 0.0 | 0.0 | 0.0 |
| 4 | 0.0 | 0.0 | 0.0 |
In [ ]:
most_prevalent_type = norms.idxmax(axis=1)
norms['check'] = norms[['rest_norm', 'bars_norm', 'pubs_norm']].max(axis=1)
norms['most_prevalent'] = most_prevalent_type
# norms['check'] = norms['rest_norm'] + norms['bars_norm'] + norms['pubs_norm']
norms.head()
Out[ ]:
| rest_norm | bars_norm | pubs_norm | check | most_prevalent | |
|---|---|---|---|---|---|
| 0 | 0.0 | 0.0 | 0.0 | 0.0 | rest_norm |
| 1 | 0.0 | 0.0 | 0.0 | 0.0 | rest_norm |
| 2 | 0.0 | 0.0 | 0.0 | 0.0 | rest_norm |
| 3 | 0.0 | 0.0 | 0.0 | 0.0 | rest_norm |
| 4 | 0.0 | 0.0 | 0.0 | 0.0 | rest_norm |
In [ ]:
def fix_prevalent(most_prevalent, check):
if check == 0:
return 'brak'
else:
return most_prevalent
In [ ]:
norms['most_prevalent'] = norms.apply(lambda x: fix_prevalent(x['most_prevalent'], x['check']), axis=1)
In [ ]:
norms['most_prevalent'].value_counts()
Out[ ]:
most_prevalent rest_norm 3467 brak 3360 pubs_norm 1410 bars_norm 306 Name: count, dtype: int64
In [ ]:
print(norms.shape[0], gastro_df.shape[0])
8543 8543
In [ ]:
gastro_join = gastro_df.join(norms, how='outer', lsuffix='_left', rsuffix='_right')
gastro_join.head()
Out[ ]:
| id | geometry | n_rest | n_bars | n_pubs | n_gastro | rest_% | bars_% | pubs_% | rest_norm | bars_norm | pubs_norm | check | most_prevalent | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 1 | 1 | POLYGON ((20.95831 52.36964, 20.95821 52.36740... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 2 | 2 | POLYGON ((20.96196 52.36957, 20.96186 52.36734... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 3 | 3 | POLYGON ((20.96561 52.36951, 20.96551 52.36728... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 4 | 4 | POLYGON ((21.05692 52.36796, 21.05681 52.36572... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
In [ ]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.patches as mpatches
from shapely.geometry import Polygon
gastro_join_copy = gastro_join.copy()
# Kolory kategorii
base_colors = {
'rest_norm': '#636EFA',
'bars_norm': '#EF553B',
'pubs_norm': '#00CC96',
'brak': '#e6e6e6'
}
legend_labels = {
'rest_norm': 'Restauracje',
'bars_norm': 'Bary',
'pubs_norm': 'Puby',
'brak': 'None'
}
# funkcja nasycenia koloru
def shade_color(base_color, value):
base_color_rgb = mcolors.hex2color(base_color)
white_rgb = (1, 1, 1)
shaded_color = mcolors.to_hex([base_color_rgb[i] * value + white_rgb[i] * (1 - value) for i in range(3)])
return shaded_color
gastro_join_copy['check'] = gastro_join_copy['check'].clip(0, 1)
# kolor na podstawie funckji nasycenia
gastro_join_copy['color'] = gastro_join_copy.apply(lambda x: shade_color(base_colors[x['most_prevalent']], x['check']), axis=1)
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
for category in base_colors.keys():
subset = gastro_join_copy[gastro_join_copy['most_prevalent'] == category]
subset.plot(ax=ax, color=subset['color'], edgecolor=None)
legend_patches = [mpatches.Patch(color=shade_color(base_colors[category], 0.7), label=legend_labels[category]) for category in base_colors.keys()]
plt.legend(handles=legend_patches)
ax.set_title(f"Dominujący rodzaj gastro (po normalizacji)");
plt.show()
In [ ]:
import geopandas as gpd
import folium
import matplotlib.colors as mcolors
from shapely.geometry import Polygon
gastro_join_copy_fol = gastro_join_copy.query('check >= 0.1')
f = folium.Figure(width=1200, height=600)
m = folium.Map(location=[gastro_join_copy_fol.geometry.centroid.y.mean(), gastro_join_copy_fol.geometry.centroid.x.mean()], zoom_start=12, tiles='CartoDB Positron')
m.add_to(f)
def style_function(feature):
return {
'fillColor': feature['properties']['color'],
'color': feature['properties']['color'],
'weight': 0.5,
'fillOpacity': 0.7
}
gastro_geojson = gastro_join_copy_fol.to_json()
folium.GeoJson(
gastro_geojson,
style_function=style_function,
tooltip=folium.GeoJsonTooltip(fields=['most_prevalent', 'check'])
).add_to(m)
legend_html = """
<div style="position: fixed;
bottom: 50px; left: 50px; width: 150px; height: 100px;
background-color: white; z-index:9999; font-size:14px;
border:2px solid grey; padding: 10px;">
<b>Legenda</b><br>
<i class="fa fa-square" style="color:#636EFA"></i> Restauracje<br>
<i class="fa fa-square" style="color:#EF553B"></i> Bary<br>
<i class="fa fa-square" style="color:#00CC96"></i> Puby<br>
</div>
"""
m.get_root().html.add_child(folium.Element(legend_html))
m.save('map.html')
f
<ipython-input-106-d8a7f9824ce6>:12: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. m = folium.Map(location=[gastro_join_copy_fol.geometry.centroid.y.mean(), gastro_join_copy_fol.geometry.centroid.x.mean()], zoom_start=12, tiles='CartoDB Positron')
Out[ ]:
In [ ]:
gastro_join_copy.head()
Out[ ]:
| id | geometry | n_rest | n_bars | n_pubs | n_gastro | rest_% | bars_% | pubs_% | rest_norm | bars_norm | pubs_norm | check | most_prevalent | color | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak | #ffffff |
| 1 | 1 | POLYGON ((20.95831 52.36964, 20.95821 52.36740... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak | #ffffff |
| 2 | 2 | POLYGON ((20.96196 52.36957, 20.96186 52.36734... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak | #ffffff |
| 3 | 3 | POLYGON ((20.96561 52.36951, 20.96551 52.36728... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak | #ffffff |
| 4 | 4 | POLYGON ((21.05692 52.36796, 21.05681 52.36572... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak | #ffffff |
In [ ]:
perc = gastro_join_copy[['rest_%', 'bars_%', 'pubs_%', ]].copy()
perc.head(1)
Out[ ]:
| rest_% | bars_% | pubs_% | |
|---|---|---|---|
| 0 | 0.0 | 0.0 | 0.0 |
In [ ]:
most_prevalent_perc = perc.idxmax(axis=1)
perc['check_perc'] = perc[['rest_%', 'bars_%', 'pubs_%', ]].max(axis=1)
perc['most_prevalent_perc'] = most_prevalent_perc
In [ ]:
perc['most_prevalent_perc'] = perc.apply(lambda x: fix_prevalent(x['most_prevalent_perc'], x['check_perc']), axis=1)
In [ ]:
perc.head()
Out[ ]:
| rest_% | bars_% | pubs_% | check_perc | most_prevalent_perc | |
|---|---|---|---|---|---|
| 0 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 1 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 2 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 3 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 4 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
In [ ]:
print(perc.shape, gastro_join.shape)
(8543, 5) (8543, 14)
In [ ]:
gastro_join.head()
Out[ ]:
| id | geometry | n_rest | n_bars | n_pubs | n_gastro | rest_% | bars_% | pubs_% | rest_norm | bars_norm | pubs_norm | check | most_prevalent | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 1 | 1 | POLYGON ((20.95831 52.36964, 20.95821 52.36740... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 2 | 2 | POLYGON ((20.96196 52.36957, 20.96186 52.36734... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 3 | 3 | POLYGON ((20.96561 52.36951, 20.96551 52.36728... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 4 | 4 | POLYGON ((21.05692 52.36796, 21.05681 52.36572... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | brak |
In [ ]:
gastro_join_perc = gastro_join[['id', 'geometry']].join(perc, how='outer', lsuffix='_left', rsuffix='_right')
In [ ]:
gastro_join_perc.head()
Out[ ]:
| id | geometry | rest_% | bars_% | pubs_% | check_perc | most_prevalent_perc | |
|---|---|---|---|---|---|---|---|
| 0 | 0 | POLYGON ((20.95100 52.36976, 20.95091 52.36752... | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 1 | 1 | POLYGON ((20.95831 52.36964, 20.95821 52.36740... | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 2 | 2 | POLYGON ((20.96196 52.36957, 20.96186 52.36734... | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 3 | 3 | POLYGON ((20.96561 52.36951, 20.96551 52.36728... | 0.0 | 0.0 | 0.0 | 0.0 | brak |
| 4 | 4 | POLYGON ((21.05692 52.36796, 21.05681 52.36572... | 0.0 | 0.0 | 0.0 | 0.0 | brak |
In [ ]:
gastro_join_perc['most_prevalent_perc'].value_counts()
Out[ ]:
most_prevalent_perc rest_% 5034 brak 3360 pubs_% 78 bars_% 71 Name: count, dtype: int64
In [ ]:
gastro_join_perc_copy = gastro_join_perc.copy()
base_colors = {
'rest_%': '#636EFA',
'bars_%': '#EF553B',
'pubs_%': '#00CC96',
'brak': '#e6e6e6'
}
legend_labels = {
'rest_%': 'Restaurants',
'bars_%': 'Bars',
'pubs_%': 'Pubs',
'brak': 'None'
}
def shade_color(base_color, value):
base_color_rgb = mcolors.hex2color(base_color)
white_rgb = (1, 1, 1)
shaded_color = mcolors.to_hex([base_color_rgb[i] * value + white_rgb[i] * (1 - value) for i in range(3)])
return shaded_color
gastro_join_perc_copy['check_perc'] = gastro_join_perc_copy['check_perc'].clip(0, 1)
gastro_join_perc_copy['color_perc'] = gastro_join_perc_copy.apply(lambda x: shade_color(base_colors[x['most_prevalent_perc']], x['check_perc']), axis=1)
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
for category in base_colors.keys():
subset = gastro_join_perc_copy[gastro_join_perc_copy['most_prevalent_perc'] == category]
subset.plot(ax=ax, color=subset['color_perc'], edgecolor=None)
legend_patches = [mpatches.Patch(color=shade_color(base_colors[category], 0.7), label=legend_labels[category]) for category in base_colors.keys()]
plt.legend(handles=legend_patches)
ax.set_title(f"Dominujący rodzaj gastro (%)");
plt.show()
In [ ]:
gastro_join_perc_copy = gastro_join_perc.copy()
base_colors = {
'rest_%': '#636EFA',
'bars_%': '#EF553B',
'pubs_%': '#00CC96',
'brak': '#e6e6e6'
}
legend_labels = {
'rest_%': 'Restauracje',
'bars_%': 'Bary',
'pubs_%': 'Puby',
'brak': 'brak'
}
def shade_color(base_color, value):
base_color_rgb = mcolors.hex2color(base_color)
white_rgb = (1, 1, 1)
shaded_color = mcolors.to_hex([base_color_rgb[i] * value + white_rgb[i] * (1 - value) for i in range(3)])
return shaded_color
gastro_join_perc_copy['check_perc'] = gastro_join_perc_copy['check_perc'].clip(0, 1)
gastro_join_perc_copy['color_perc'] = gastro_join_perc_copy.apply(lambda x: shade_color(base_colors[x['most_prevalent_perc']], x['check_perc']), axis=1)
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
for category in base_colors.keys():
subset = gastro_join_perc_copy[gastro_join_perc_copy['most_prevalent_perc'] == category]
subset.plot(ax=ax, color=subset['color_perc'], edgecolor=None)
legend_patches = [mpatches.Patch(color=shade_color(base_colors[category], 0.7), label=legend_labels[category]) for category in base_colors.keys()]
ax.legend(handles=legend_patches)
msize = 9
restaurants.plot(color='#636EFA', label=f'Restauracje ({restaurants.shape[0]})', markersize=msize, ax=ax, edgecolor="black", linewidth=0.5)
bars.plot(color='#EF553B', label=f'Bary ({bars.shape[0]})', markersize=msize, ax=ax, edgecolor="black", linewidth=0.5)
pubs.plot(color='#00CC96', label=f'Puby ({pubs.shape[0]})', markersize=msize, ax=ax, edgecolor="black", linewidth=0.5)
ax.set_title("Dominujący rodzaj gastro (%)")
plt.show()
Tresholds (?) - z tutoriala Helsinek¶
In [ ]:
# Extract the grid cells within given travel time threshold
threshold = 45
access = travel_time_df.loc[travel_time_df["travel_time_TRANSIT"] <= threshold].copy()
In [ ]:
print(access.shape)
access.head(3)
(2138, 12)
Out[ ]:
| id | zaludnienie | geometry | from_id_x | to_id_x | travel_time_TRANSIT | from_id_y | to_id_y | travel_time_BIKE | difference | to_id | tt_to_closest_library | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1566 | 1566 | 8426 | POLYGON ((21.04985 52.29419, 21.04975 52.29195... | 0 | 1566 | 45.0 | 0 | 1566 | 62.0 | -17.0 | 1566 | 13.0 |
| 1593 | 1593 | 4175 | POLYGON ((20.92578 52.29404, 20.92568 52.29180... | 0 | 1593 | 45.0 | 0 | 1593 | 61.0 | -16.0 | 1593 | 17.0 |
| 1594 | 1594 | 6152 | POLYGON ((20.92943 52.29398, 20.92933 52.29174... | 0 | 1594 | 43.0 | 0 | 1594 | 58.0 | -15.0 | 1594 | 15.0 |
In [ ]:
ax = access.plot(column="zaludnienie", figsize=(10,5), legend=True)
ax.set_title(f"The number of people living within {threshold} minutes travel time threshold.");
In [ ]:
pop_within_threshold = access["zaludnienie"].sum()
pop_share = pop_within_threshold / travel_time_df["zaludnienie"].sum()
print(f"Population within accessibility thresholds: {pop_within_threshold} ({pop_share*100:.0f} %)")
Population within accessibility thresholds: 15612445 (52 %)
In [ ]:
threshold = 15
# Count the number of opportunities from each grid cell
opportunities = travel_time_matrix.loc[travel_time_matrix["travel_time"]<=threshold].groupby("to_id")["from_id"].count().reset_index()
# Rename the column for more intuitive one
opportunities = opportunities.rename(columns={"from_id": "num_opportunities"})
In [ ]:
# Merge with population grid
opportunities = pop_grid.merge(opportunities, left_on="id", right_on="to_id")
In [ ]:
opportunities.head(3)
Out[ ]:
| id | zaludnienie | geometry | to_id | num_opportunities | |
|---|---|---|---|---|---|
| 0 | 134 | 627 | POLYGON ((20.93956 52.35875, 20.93946 52.35651... | 134 | 1 |
| 1 | 180 | 1328 | POLYGON ((20.93946 52.35651, 20.93936 52.35427... | 180 | 2 |
| 2 | 227 | 1903 | POLYGON ((20.93571 52.35433, 20.93561 52.35210... | 227 | 1 |
In [ ]:
fig, ax = plt.subplots(figsize=(9, 9))
opportunities.plot(column="num_opportunities", scheme="natural_breaks", k=12, legend=True, ax=ax)
ax.set_title(f"Number of opportunities within {threshold} minutes.");
plt.show()
Mieszkania - fin¶
Lepsze, ale bardziej skomplikowane podejście do wykorzystania odległości w regresji liniowej
In [ ]:
import pandas as pd
In [ ]:
mieszkania = pd.read_csv('/content/drive/MyDrive/Projekt zaliczeniowy/Mieszkania Warszawy/mieszkania_DS3_obróbka.csv')
In [ ]:
mieszkania.drop('link', axis=1, inplace=True)
In [ ]:
mieszkania_g = gpd.GeoDataFrame(
mieszkania,
geometry=gpd.points_from_xy(mieszkania.long, mieszkania.latt), crs="EPSG:4326"
)
In [ ]:
mieszkania_g.plot()
Out[ ]:
<Axes: >
In [ ]:
display(mieszkania_g.query('long > 21.5'))
display(mieszkania_g.query('latt < 52'))
| Unnamed: 0 | numer | Cena | powierzchnia | lokalizacja | n_pokojow | pietro | czynsz | wlasnosc | wykonczenie | ... | ogloszenie | rok_budowy | zabudowa | winda | latt | long | Dystans do Swietokrzyskiej | najblizsza stacja | dystans do najblizszej stacji [m] | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2140 | 2140 | 3345 | 560000 | 70.5 | Wesoła | 3 | 3 | 750.0 | spółdzielcze wł. prawo do lokalu | do zamieszkania | ... | prywatny | 2011 | blok | nie | 52.173885 | 21.546202 | 37.392421 | Kabaty | 33230.191707 | POINT (21.54620 52.17388) |
1 rows × 24 columns
| Unnamed: 0 | numer | Cena | powierzchnia | lokalizacja | n_pokojow | pietro | czynsz | wlasnosc | wykonczenie | ... | ogloszenie | rok_budowy | zabudowa | winda | latt | long | Dystans do Swietokrzyskiej | najblizsza stacja | dystans do najblizszej stacji [m] | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 919 | 919 | 1288 | 605000 | 49.95 | Rembertów | 3 | 5 | 670.0 | pełna własność | do zamieszkania | ... | biuro nieruchomości | 2022 | blok | nie | 51.95739 | 20.83979 | 33.002902 | Kabaty | 24787.867532 | POINT (20.83979 51.95739) |
1 rows × 24 columns
In [ ]:
mieszkania_g.drop([919, 2140], inplace=True)
mieszkania_g.plot()
Out[ ]:
<Axes: >
In [ ]:
m1 = pd.read_csv('/content/drive/MyDrive/Projekt zaliczeniowy/Mieszkania Warszawy/Metro1.csv')
m2 = pd.read_csv('/content/drive/MyDrive/Projekt zaliczeniowy/Mieszkania Warszawy/Metro2.csv')
m1['Linia'] = 'M1'
m2['Linia'] = 'M2'
m1.head(1)
Out[ ]:
| order | stacja | latitude | longitude | Linia | |
|---|---|---|---|---|---|
| 0 | 1 | Kabaty | 52.131262 | 21.065808 | M1 |
In [ ]:
m1_g = gpd.GeoDataFrame(
m1,
geometry=gpd.points_from_xy(m1.longitude, m1.latitude), crs="EPSG:4326"
)
m2_g = gpd.GeoDataFrame(
m2,
geometry=gpd.points_from_xy(m2.longitude, m2.latitude), crs="EPSG:4326"
)
In [ ]:
fig, ax = plt.subplots(figsize=(7, 7))
mieszkania_g.plot(color='#00CC96', label=f'Mieszkania', markersize=4, ax=ax)
m1_g.plot(color='#0070c0', label=f'Linia M1', markersize=9, ax=ax)
m2_g.plot(color='#ff0000', label=f'Linia M2', markersize=9, ax=ax)
ax.legend()
plt.show()
In [ ]:
metro = pd.concat([m1, m2], axis=0)
In [ ]:
metro = metro.copy()
In [ ]:
metro = metro.reset_index().reset_index() \
.drop(['index', 'order'], axis=1) \
.rename(columns={'level_0':'id'})
In [ ]:
metro.head()
Out[ ]:
| id | stacja | latitude | longitude | Linia | |
|---|---|---|---|---|---|
| 0 | 0 | Kabaty | 52.131262 | 21.065808 | M1 |
| 1 | 1 | Natolin | 52.140313 | 21.057601 | M1 |
| 2 | 2 | Imielin | 52.149739 | 21.045623 | M1 |
| 3 | 3 | Stokłosy | 52.156171 | 21.032038 | M1 |
| 4 | 4 | Ursynów | 52.162061 | 21.024950 | M1 |
In [ ]:
metro_g = gpd.GeoDataFrame(
metro,
geometry=gpd.points_from_xy(metro.longitude, metro.latitude), crs="EPSG:4326"
)
In [ ]:
metro_g.to_crs(pyproj.CRS.from_epsg(4326), inplace=True)
mieszkania_g.to_crs(pyproj.CRS.from_epsg(4326), inplace=True)
In [ ]:
mieszkania_g = mieszkania_g.reset_index()
# mieszkania = mieszkania.drop('order', axis=1)
mieszkania_g = mieszkania_g.rename(columns={'index':'id'})
In [ ]:
mieszkania_g.head(1)
Out[ ]:
| id | Unnamed: 0 | numer | Cena | powierzchnia | lokalizacja | n_pokojow | pietro | czynsz | wlasnosc | ... | ogloszenie | rok_budowy | zabudowa | winda | latt | long | Dystans do Swietokrzyskiej | najblizsza stacja | dystans do najblizszej stacji [m] | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 1 | 1455000 | 74.0 | Wilanów | 4 | 2 | 1070.0 | pełna własność | ... | biuro nieruchomości | 2017 | apartamentowiec | tak | 52.160458 | 21.083189 | 9.763242 | Imielin | 2856.540181 | POINT (21.08319 52.16046) |
1 rows × 25 columns
In [ ]:
travel_time_matrix_computer = TravelTimeMatrixComputer(
transport_network,
origins=metro_g,
destinations=mieszkania_g,
departure=datetime.datetime(2024,6,8,18,30),
transport_modes=[TransportMode.TRANSIT, TransportMode.WALK],
)
travel_time_matrix = travel_time_matrix_computer.compute_travel_times()
/usr/local/lib/python3.10/dist-packages/r5py/r5/regional_task.py:223: RuntimeWarning: Departure time 2024-06-08 18:30:00 is outside of the time range covered by currently loaded GTFS data sets. warnings.warn(
In [ ]:
travel_time_matrix.head(3)
Out[ ]:
| from_id | to_id | travel_time | |
|---|---|---|---|
| 0 | 0 | 0 | 39.0 |
| 1 | 0 | 1 | 50.0 |
| 2 | 0 | 2 | 37.0 |
In [ ]:
# czas do najbliższej stacji
closest_metro_station = travel_time_matrix.groupby("to_id")["travel_time"].min().reset_index()
closest_metro_station = closest_metro_station.rename(columns={'travel_time':'tt_to_closest_metro_station'})
print(closest_metro_station.shape)
closest_metro_station.head()
(2261, 2)
Out[ ]:
| to_id | tt_to_closest_metro_station | |
|---|---|---|
| 0 | 0 | 26.0 |
| 1 | 1 | 26.0 |
| 2 | 2 | 10.0 |
| 3 | 3 | 14.0 |
| 4 | 4 | 14.0 |
In [ ]:
mieszkania_m = mieszkania_g.merge(closest_metro_station, left_on="id", right_on='to_id')
In [ ]:
mieszkania_m.head(3)
Out[ ]:
| id | Unnamed: 0 | numer | Cena | powierzchnia | lokalizacja | n_pokojow | pietro | czynsz | wlasnosc | ... | zabudowa | winda | latt | long | Dystans do Swietokrzyskiej | najblizsza stacja | dystans do najblizszej stacji [m] | geometry | to_id | tt_to_closest_metro_station | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 1 | 1455000 | 74.0 | Wilanów | 4 | 2 | 1070.0 | pełna własność | ... | apartamentowiec | tak | 52.160458 | 21.083189 | 9.763242 | Imielin | 2856.540181 | POINT (21.08319 52.16046) | 0 | 26.0 |
| 1 | 1 | 1 | 3 | 545000 | 34.1 | Praga-Południe | 1 | 4 | 550.0 | pełna własność | ... | blok | tak | 52.250103 | 21.086533 | 5.586225 | Stadion Narodowy | 2963.068443 | POINT (21.08653 52.25010) | 1 | 26.0 |
| 2 | 2 | 2 | 4 | 1450100 | 85.3 | Śródmieście | 4 | 3 | 850.0 | pełna własność | ... | blok | tak | 52.227139 | 21.004595 | 0.935946 | Centrum | 553.974822 | POINT (21.00459 52.22714) | 2 | 10.0 |
3 rows × 27 columns
In [ ]:
f = folium.Figure(width=1200, height=600)
m = mieszkania_m.explore("tt_to_closest_metro_station", cmap="RdYlBu", max_zoom=13, tiles="Cartodb Positron")
# m = origin.explore(m=m, color="red", marker_kwds={"radius": 10})
m.add_to(f)
f
Out[ ]:
In [ ]:
stacja_q = "Kabaty"
In [ ]:
st = metro_g[metro_g['stacja'] == stacja_q]
st
Out[ ]:
| id | stacja | latitude | longitude | Linia | geometry | |
|---|---|---|---|---|---|---|
| 0 | 0 | Kabaty | 52.131262 | 21.065808 | M1 | POINT (21.06581 52.13126) |
In [ ]:
st_name= st['stacja'].to_string()
st_name
Out[ ]:
'0 Kabaty'
In [ ]:
print(st_name)
0 Kabaty
In [ ]:
travel_time_matrix_computer = TravelTimeMatrixComputer(
transport_network,
origins=st,
destinations=mieszkania_g,
departure=datetime.datetime(2024,6,8,18,30),
transport_modes=[TransportMode.TRANSIT, TransportMode.WALK],
)
travel_time_matrix = travel_time_matrix_computer.compute_travel_times()
/usr/local/lib/python3.10/dist-packages/r5py/r5/regional_task.py:223: RuntimeWarning: Departure time 2024-06-08 18:30:00 is outside of the time range covered by currently loaded GTFS data sets. warnings.warn(
In [ ]:
mieszkania_st = mieszkania_m.copy()
In [ ]:
mieszkania_st = mieszkania_st.merge(travel_time_matrix, left_on="id", right_on="to_id")
mieszkania_st = mieszkania_st.rename(columns={"travel_time": f"tt_to_{stacja_q}", })
mieszkania_st.head()
Out[ ]:
| id | Unnamed: 0 | numer | Cena | powierzchnia | lokalizacja | n_pokojow | pietro | czynsz | wlasnosc | ... | long | Dystans do Swietokrzyskiej | najblizsza stacja | dystans do najblizszej stacji [m] | geometry | to_id_x | tt_to_closest_metro_station | from_id | to_id_y | tt_to_Kabaty | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 1 | 1455000 | 74.00 | Wilanów | 4 | 2 | 1070.0 | pełna własność | ... | 21.083189 | 9.763242 | Imielin | 2856.540181 | POINT (21.08319 52.16046) | 0 | 26.0 | 0 | 0 | 39.0 |
| 1 | 1 | 1 | 3 | 545000 | 34.10 | Praga-Południe | 1 | 4 | 550.0 | pełna własność | ... | 21.086533 | 5.586225 | Stadion Narodowy | 2963.068443 | POINT (21.08653 52.25010) | 1 | 26.0 | 0 | 1 | 50.0 |
| 2 | 2 | 2 | 4 | 1450100 | 85.30 | Śródmieście | 4 | 3 | 850.0 | pełna własność | ... | 21.004595 | 0.935946 | Centrum | 553.974822 | POINT (21.00459 52.22714) | 2 | 10.0 | 0 | 2 | 38.0 |
| 3 | 3 | 3 | 5 | 1938400 | 122.16 | Ursynów | 5 | 0 | NaN | pełna własność | ... | 21.058710 | 12.454111 | Kabaty | 623.714851 | POINT (21.05871 52.12764) | 3 | 14.0 | 0 | 3 | 14.0 |
| 4 | 4 | 4 | 6 | 1836600 | 126.63 | Ursynów | 4 | 0 | NaN | pełna własność | ... | 21.058710 | 12.454111 | Kabaty | 623.714851 | POINT (21.05871 52.12764) | 4 | 14.0 | 0 | 4 | 14.0 |
5 rows × 30 columns
In [ ]:
f = folium.Figure(width=1600, height=900)
m = mieszkania_st.explore(f"tt_to_{stacja_q}", cmap="RdYlBu", max_zoom=13, tiles="Cartodb Positron")
# m = origin.explore(m=m, color="red", marker_kwds={"radius": 10})
m.add_to(f)
f
Out[ ]:
Widać, że transportem miejskim ciężko dostać się na Kabaty z Wawra, mimo że dzielnice sąsiadują ze sobą. Metoda z OSM uwzględnia utrudniony dojazd wynikający tu rozdzielającej dzielnice Wisły i braku transportu.